Lab 11

Author

Breyonne Williams

Steps 1-5

library(data.table)

cv_states <- fread("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")

state_pops <- fread("https://raw.githubusercontent.com/COVID19Tracking/associated-data/master/us_census_data/us_census_2018_population_estimates_states.csv")

state_pops$abb <- state_pops$state
state_pops$state <- state_pops$state_name
state_pops$state_name <- NULL

cv_states <- merge(cv_states, state_pops, by.x = "state", by.y = "state")

dim(cv_states)
[1] 58094     9
head(cv_states)
     state       date fips cases deaths geo_id population pop_density abb
1: Alabama 2020-03-13    1     6      0      1    4887871    96.50939  AL
2: Alabama 2020-03-14    1    12      0      1    4887871    96.50939  AL
3: Alabama 2020-03-15    1    23      0      1    4887871    96.50939  AL
4: Alabama 2020-03-16    1    29      0      1    4887871    96.50939  AL
5: Alabama 2020-03-17    1    39      0      1    4887871    96.50939  AL
6: Alabama 2020-03-18    1    51      0      1    4887871    96.50939  AL
tail(cv_states)
     state       date fips  cases deaths geo_id population pop_density abb
1: Wyoming 2023-03-18   56 185640   2009     56     577737    5.950611  WY
2: Wyoming 2023-03-19   56 185640   2009     56     577737    5.950611  WY
3: Wyoming 2023-03-20   56 185640   2009     56     577737    5.950611  WY
4: Wyoming 2023-03-21   56 185800   2014     56     577737    5.950611  WY
5: Wyoming 2023-03-22   56 185800   2014     56     577737    5.950611  WY
6: Wyoming 2023-03-23   56 185800   2014     56     577737    5.950611  WY
str(cv_states)
Classes 'data.table' and 'data.frame':  58094 obs. of  9 variables:
 $ state      : chr  "Alabama" "Alabama" "Alabama" "Alabama" ...
 $ date       : IDate, format: "2020-03-13" "2020-03-14" ...
 $ fips       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ cases      : int  6 12 23 29 39 51 78 106 131 157 ...
 $ deaths     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ geo_id     : int  1 1 1 1 1 1 1 1 1 1 ...
 $ population : int  4887871 4887871 4887871 4887871 4887871 4887871 4887871 4887871 4887871 4887871 ...
 $ pop_density: num  96.5 96.5 96.5 96.5 96.5 ...
 $ abb        : chr  "AL" "AL" "AL" "AL" ...
 - attr(*, ".internal.selfref")=<externalptr> 
 - attr(*, "sorted")= chr "state"
cv_states$date <- as.Date(cv_states$date, format="%Y-%m-%d")

state_list <- unique(cv_states$state)
cv_states$state <- factor(cv_states$state, levels = state_list)
abb_list <- unique(cv_states$abb)
cv_states$abb <- factor(cv_states$abb, levels = abb_list)

cv_states <- cv_states[order(cv_states$state, cv_states$date),]

str(cv_states)
Classes 'data.table' and 'data.frame':  58094 obs. of  9 variables:
 $ state      : Factor w/ 52 levels "Alabama","Alaska",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ date       : Date, format: "2020-03-13" "2020-03-14" ...
 $ fips       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ cases      : int  6 12 23 29 39 51 78 106 131 157 ...
 $ deaths     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ geo_id     : int  1 1 1 1 1 1 1 1 1 1 ...
 $ population : int  4887871 4887871 4887871 4887871 4887871 4887871 4887871 4887871 4887871 4887871 ...
 $ pop_density: num  96.5 96.5 96.5 96.5 96.5 ...
 $ abb        : Factor w/ 52 levels "AL","AK","AZ",..: 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, ".internal.selfref")=<externalptr> 
head(cv_states)
     state       date fips cases deaths geo_id population pop_density abb
1: Alabama 2020-03-13    1     6      0      1    4887871    96.50939  AL
2: Alabama 2020-03-14    1    12      0      1    4887871    96.50939  AL
3: Alabama 2020-03-15    1    23      0      1    4887871    96.50939  AL
4: Alabama 2020-03-16    1    29      0      1    4887871    96.50939  AL
5: Alabama 2020-03-17    1    39      0      1    4887871    96.50939  AL
6: Alabama 2020-03-18    1    51      0      1    4887871    96.50939  AL
tail(cv_states)
     state       date fips  cases deaths geo_id population pop_density abb
1: Wyoming 2023-03-18   56 185640   2009     56     577737    5.950611  WY
2: Wyoming 2023-03-19   56 185640   2009     56     577737    5.950611  WY
3: Wyoming 2023-03-20   56 185640   2009     56     577737    5.950611  WY
4: Wyoming 2023-03-21   56 185800   2014     56     577737    5.950611  WY
5: Wyoming 2023-03-22   56 185800   2014     56     577737    5.950611  WY
6: Wyoming 2023-03-23   56 185800   2014     56     577737    5.950611  WY
range_date <- range(cv_states$date)
range_cases <- range(cv_states$cases)
range_deaths <- range(cv_states$deaths)

range_date
[1] "2020-01-21" "2023-03-23"
range_cases
[1]        1 12169158
range_deaths
[1]      0 104277
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':

    between, first, last
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(zoo)

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
library(ggplot2)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
state_list <- unique(cv_states$state)

for (i in 1:length(state_list)) {
  cv_subset = subset(cv_states, state == state_list[i])
  cv_subset <- cv_subset[order(cv_subset$date),]

  
  cv_subset$new_cases = cv_subset$cases[1]
  cv_subset$new_deaths = cv_subset$deaths[1]

 
  for (j in 2:nrow(cv_subset)) {
    cv_subset$new_cases[j] <- cv_subset$cases[j] - cv_subset$cases[j - 1]
    cv_subset$new_deaths[j] <- cv_subset$deaths[j] - cv_subset$deaths[j - 1]
  }

cv_states$new_cases[cv_states$state==state_list[i]] = cv_subset$new_cases
cv_states$new_deaths[cv_states$state==state_list[i]] = cv_subset$new_deaths

}



cv_states <- cv_states %>% dplyr::filter(date >= "2021-06-01")


p1 <- ggplot(cv_states, aes(x = date, y = new_cases, color = state)) +
  geom_line() + geom_point(size = 1, alpha = 0.5)
ggplotly(p1)
p1 <- NULL

p2 <- ggplot(cv_states, aes(x = date, y = new_deaths, color = state)) +
  geom_line() + geom_point(size = 1, alpha = 0.5)
ggplotly(p2)
p2 <- NULL

cv_states$new_cases[cv_states$new_cases < 0] = 0
cv_states$new_deaths[cv_states$new_deaths < 0] = 0


for (i in 1:length(state_list)) {
  cv_subset = subset(cv_states, state == state_list[i])

 
  cv_subset$cases <- cv_subset$cases[1]
  cv_subset$deaths <- cv_subset$deaths[1]

 
  for (j in 2:nrow(cv_subset)) {
    cv_subset$cases[j] <- cv_subset$new_cases[j] + cv_subset$cases[j - 1]
    cv_subset$deaths[j] <- cv_subset$new_deaths[j] + cv_subset$deaths[j - 1]
  }

cv_states$cases[cv_states$state==state_list[i]] = cv_subset$cases
cv_states$deaths[cv_states$state==state_list[i]] = cv_subset$deaths
}


cv_states$new_cases <- zoo::rollmean(cv_states$new_cases, k = 7, fill = NA, align = 'right') %>% round(digits = 0)
cv_states$new_deaths <- zoo::rollmean(cv_states$new_deaths, k = 7, fill = NA, align = 'right') %>% round

p2<-ggplot(cv_states, aes(x = date, y = new_deaths, color = state)) + geom_line() + geom_point(size = .5, alpha = 0.5)
ggplotly(p2)
cv_states$per100k = as.numeric(format(round(cv_states$cases / (cv_states$population / 100000), 1), nsmall = 1))
cv_states$newper100k = as.numeric(format(round(cv_states$new_cases / (cv_states$population / 100000), 1), nsmall = 1))
Warning: NAs introduced by coercion
cv_states$deathsper100k = as.numeric(format(round(cv_states$deaths / (cv_states$population / 100000), 1), nsmall = 1))
cv_states$newdeathsper100k = as.numeric(format(round(cv_states$new_deaths / (cv_states$population / 100000), 1), nsmall = 1))
Warning: NAs introduced by coercion
cv_states = cv_states %>% mutate(naive_CFR = round((deaths*100/cases),2))

cv_states_today <- subset(cv_states, date == max(cv_states$date))

Scatterplots

library(plotly)
cv_states_today %>%
  plot_ly(x = ~pop_density, y = ~cases, 
          type = 'scatter', mode = 'markers', color = ~state,
          size = ~population, sizes = c(5, 70), marker = list(sizemode = 'diameter', opacity = 0.5))
Warning: Ignoring 1 observations
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors

Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
cv_states_today_filter <- cv_states_today %>% filter(state!="District of Columbia")

cv_states_today_filter %>% 
  plot_ly(x = ~pop_density, y = ~cases, 
          type = 'scatter', mode = 'markers', color = ~state,
          size = ~population, sizes = c(5, 70), marker = list(sizemode='diameter', opacity=0.5))
Warning: Ignoring 1 observations

Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors

Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
cv_states_today_filter %>% 
  plot_ly(x = ~pop_density, y = ~deathsper100k,
          type = 'scatter', mode = 'markers', color = ~state,
          size = ~population, sizes = c(5, 70), marker = list(sizemode = 'diameter', opacity = 0.5))
Warning: Ignoring 1 observations

Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors

Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
cv_states_today_filter %>% 
  plot_ly(x = ~pop_density, y = ~deathsper100k,
          type = 'scatter', mode = 'markers', color = ~state,
          size = ~population, sizes = c(5, 70), marker = list(sizemode='diameter', opacity=0.5),
          hoverinfo = 'text',
          text = ~paste( paste(state, ":", sep=""), paste(" Cases per 100k: ", per100k, sep="") , 
                         paste(" Deaths per 100k: ", "Population Density", sep=""), sep = "<br>")) %>%
  layout(title = "Population-normalized COVID-19 deaths (per 100k) vs. population density for US states",
                  yaxis = list(title = "Deaths per 100k"), xaxis = list(title = "Population Density"),
         hovermode = "compare") 
Warning: Ignoring 1 observations

Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors

Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
p <- ggplot(cv_states_today, aes(x = pop_density, y = deathsper100k, size = population)) +
  geom_point() +
  geom_smooth(method = 'lm', color = 'blue')
ggplotly(p)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
Warning: The following aesthetics were dropped during statistical transformation: size
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?
plot_ly(cv_states, x = ~date, y = ~new_cases, type = "scatter", mode = "lines")
cv_states %>% filter(state=="Florida") %>% plot_ly(x = ~date, y = ~new_cases, type = "scatter", mode = "lines") %>% add_trace(x = ~date, y = ~new_deaths, type = "scatter", mode = "lines") 
library(tidyr)

cv_states_mat <- cv_states %>% select(state, date, new_cases) %>% dplyr::filter(date > as.Date("2021-06-15"))
cv_states_mat2 <- as.data.frame(pivot_wider(cv_states_mat, names_from = state, values_from = new_cases))
rownames(cv_states_mat2) <- cv_states_mat2$date
cv_states_mat2$date <- NULL
cv_states_mat2 <- as.matrix(cv_states_mat2)


plot_ly(x = colnames(cv_states_mat2), y = rownames(cv_states_mat2),
                   z = ~cv_states_mat2,
                   type = "heatmap",
                   showscale = TRUE)
cv_states_mat <- cv_states %>% select(state, date, newper100k) %>% dplyr::filter(date > as.Date("2021-06-15"))
cv_states_mat2 <- as.data.frame(pivot_wider(cv_states_mat, names_from = state, values_from = newper100k))
rownames(cv_states_mat2) <- cv_states_mat2$date
cv_states_mat2$date <- NULL
cv_states_mat2 <- as.matrix(cv_states_mat2)

plot_ly(x = colnames(cv_states_mat2), y = rownames(cv_states_mat2),
                   z = ~cv_states_mat2,
                   type = "heatmap",
                   showscale = TRUE)
filter_dates <- seq(as.Date("2021-06-15"), as.Date("2021-11-01"), by = 14)  

cv_states_mat <- cv_states %>% select(state, date, newper100k) %>% filter(date %in% filter_dates)
cv_states_mat2 <- as.data.frame(pivot_wider(cv_states_mat, names_from = state, values_from = newper100k))
rownames(cv_states_mat2) <- cv_states_mat2$date
cv_states_mat2$date <- NULL
cv_states_mat2 <- as.matrix(cv_states_mat2)


plot_ly(x = colnames(cv_states_mat2), y = rownames(cv_states_mat2),
                   z = ~cv_states_mat2,
                   type = "heatmap",
                   showscale = TRUE)
pick.date = "2021-10-15"

cv_per100 <- cv_states %>% filter(date==pick.date) %>% select(state, abb, newper100k, cases, deaths) # select data
cv_per100$state_name <- cv_per100$state
cv_per100$state <- cv_per100$abb
cv_per100$abb <- NULL

cv_per100$hover <- with(cv_per100, paste(state_name, '<br>', "Cases per 100k: ", newper100k, '<br>', "Cases: ", cases, '<br>', "Deaths: ", deaths))

set_map_details <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)

shadeLimit <- 125

fig <- plot_geo(cv_per100, locationmode = 'USA-states') %>% 
  add_trace(
    z = ~newper100k, text = ~hover, locations = ~state,
    color = ~newper100k, colors = 'Purples'
  )
fig <- fig %>% colorbar(title = paste0("Cases per 100k: ", pick.date), limits = c(0,shadeLimit))
fig <- fig %>% layout(
    title = paste('Cases per 100k by State as of ', pick.date, '<br>(Hover for value)'),
    geo = set_map_details
  )
fig_pick.date <- fig

cv_per100 <- cv_states_today %>%  select(state, abb, newper100k, cases, deaths) # select data
cv_per100$state_name <- cv_per100$state
cv_per100$state <- cv_per100$abb
cv_per100$abb <- NULL

cv_per100$hover <- with(cv_per100, paste(state_name, '<br>', "Cases per 100k: ", newper100k, '<br>', "Cases: ", cases, '<br>', "Deaths: ", deaths))

set_map_details <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)

fig <- plot_geo(cv_per100, locationmode = 'USA-states') %>% 
  add_trace(
    z = ~newper100k, text = ~hover, locations = ~state,
    color = ~newper100k, colors = 'Purples'
  )
fig <- fig %>% colorbar(title = paste0("Cases per 100k: ", Sys.Date()), limits = c(0,shadeLimit))
fig <- fig %>% layout(
    title = paste('Cases per 100k by State as of', Sys.Date(), '<br>(Hover for value)'),
    geo = set_map_details
  )
fig_Today <- fig


subplot(fig_pick.date, fig_Today, nrows = 2, margin = .05)